
#########################
### K means functions ###
#########################

update_delta_theta <- function(gamma, e_data, K, spline){
  # the order is Y, id, time, treatment timing, weight, X
  N0 <- max(e_data[,2])
  T0 <- max(e_data[,3])
  # p: number of control covariates
  p <- ncol(e_data) - 5
  
  # take type estimates as given
  e_data_k <- gamma[e_data[,2]]
  
  # spline length; if using full time fixed-effect, tau=T0
  tau <- length(spline(1))
  
  # construct matrix with control covariates and splines
  X <- matrix(0,N0*T0,p+K*tau) 
  if (p>0){
    X[,1:p] <- e_data[,1:p+5]  
  }
  for (i in 1:nrow(X)){
    t <- e_data[i,3]
    k <- e_data_k[i]
    
    # splines
    X[i,p+tau*(k-1)+1:tau] <- spline(t)
  }
  
  # update delta and theta with OLS
  OLS <- solve(t(X)%*%diag(e_data[,5])%*%X)%*%t(X)%*%diag(e_data[,5])%*%e_data[,1]  
  delta_updated <- t(matrix(OLS[1:(K*tau)+p],tau,K))
  rownames(delta_updated) <- 1:K
  colnames(delta_updated) <- 1:tau
  if (p>0){
    theta_updated <- OLS[1:p]  
  } else if (p==0){
    theta_updated <- 0
  }
  
  # return updated delta and theta with some summary statistics
  summary <- cbind(1:K, sapply(1:K, function(k) sum(gamma==k)) )
  colnames(summary) <- c("type", "Nk")
  ans <- list(delta=delta_updated,
              theta=theta_updated,
              summary=summary)
  return(ans)
}

update_type <- function(delta, theta, e_data, K, spline){
  # the order is Y, id, time, treatment timing, weight, X
  N0 <- max(e_data[,2])
  T0 <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  # update type estimates
  gamma_updated <- rep(0,N0)
  SSE_full <- rep(0,N0)
  
  if (p==0){
    
    for (i in unique(e_data[,2])){
      time_index <- (e_data[,3])[e_data[,2]==i]
      SSE <- rep(0,K)
      
      for (k in 1:K){
        deltak <- matrix(t(sapply(time_index,spline)),T0)%*%delta[k,]
        SSE[k] <- sum((e_data[e_data[,2]==i,1] - deltak)^2)
      }
      gamma_updated[i] <- which.min(SSE)
      SSE_full[i] <- min(SSE)
    }
    
  } else if (p>0){
    
    for (i in unique(e_data[,2])){
      time_index <- (e_data[,3])[e_data[,2]==i]
      SSE <- rep(0,K)
      residual <- e_data[,1] - e_data[,1:p+5]%*%theta
      
      for (k in 1:K){
        deltak <- matrix(t(sapply(time_index,spline)),T0)%*%delta[k,]
        SSE[k] <- sum((residual[e_data[,2]==i] - deltak)^2)
      }
      gamma_updated[i] <- which.min(SSE)
      SSE_full[i] <- min(SSE)
    }
  }
  
  # return updated type estimates with minimized objective function
  ans <- list(gamma=gamma_updated, 
              MSE=sum(SSE_full)/(N0*T0))
  return(ans)
}

kmeans_fun_once <- function(initial_gamma, e_data, K, eps, maxiter, spline){
  # the order is Y, id, time, treatment timing, weight, X
  N0 <- max(e_data[,2])
  T0 <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  temp1 <- update_delta_theta(initial_gamma, e_data, K, spline)
  temp2 <- update_type(temp1$delta, temp1$theta, e_data, K, spline)
    
  gamma_old <- initial_gamma
  gamma_new <- temp2$gamma
    
  estimates_new <- c(temp1$delta, temp1$theta)
  estimates_old <- estimates_new - eps - 1

  noi <- 1
  MSE <- temp2$MSE
  
  while (max(abs(estimates_new-estimates_old)) > eps & 
         noi < maxiter & 
         sum(gamma_old!=gamma_new) > 0){
    
    if (length(unique(gamma_new))==K){
      
      estimates_old <- estimates_new
      gamma_old <- gamma_new 
      
      temp1 <- update_delta_theta(gamma_old, e_data, K, spline)
      temp2 <- update_type(temp1$delta, temp1$theta, e_data, K, spline)
      
      estimates_new <- c(temp1$delta, temp1$theta)
      gamma_new <- temp2$gamma
      
    } else {
      
      tempK <- length(unique(gamma_new))
      tempDelta <- matrix(estimates_new[1:(length(estimates_new)-p)],K)
      
      estimates_old <- estimates_new
      gamma_old <- sapply(gamma_new, function(x) which(unique(gamma_new)==x))
      
      temp1 <- update_delta_theta(gamma_old, e_data, tempK, spline)
      temp2 <- update_type(matrix(rbind((temp1$delta),
                                        as.matrix(tempDelta[(1:K)[!(1:K %in% unique(gamma_new))],])),
                                  K), 
                           temp1$theta, e_data, K, spline)
      
      estimates_new <- c(matrix(rbind((temp1$delta),
                                      as.matrix(tempDelta[(1:K)[!(1:K %in% unique(gamma_new))],])),
                                K),
                         temp1$theta)
      gamma_new <- temp2$gamma
      
    }
    
    MSE <- c(MSE, temp2$MSE)
    noi <- noi + 1
  }
  
  ans <- list(delta=temp1$delta, 
              theta=temp1$theta, 
              gamma=temp2$gamma, MSE=MSE, noi=noi, summary=temp1$summary)
  return(ans)
}

kmeans_fun <- function(n_of_initial_values, e_data, K, eps, maxiter, spline){
  # the order is Y, id, time, treatment timing, weight, X
  N0 <- max(e_data[,2])
  
  # try different initial values
  result <- matrix(0,N0+1,n_of_initial_values)
  for (t in 1:n_of_initial_values){
    init <- ceiling(runif(N0,0,K))
    while (length(unique(init))<K){
      init <- ceiling(runif(N0,0,K))
    }
    kmeans <- kmeans_fun_once(init,e_data,K,eps,maxiter,spline)
    result[,t] <- c(init,kmeans$MSE[length(kmeans$MSE)])
  }
  
  # find initial value with minimized objective function
  init <- result[1:N0,which.min(result[N0+1,])]
  kmeans <- kmeans_fun_once(init,e_data,K,eps,maxiter,spline)
  
  return(kmeans)
}

retrieve_delta <- function(T, delta, spline){
  ans <- sapply(1:T, 
                function(t) apply(matrix(t(sapply(1:t,spline)),t)%*%t(delta),2,sum))
  return(ans)
}

extrapolate_gamma <- function(T, kmeans, e_data, spline){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  K <- max(kmeans$gamma)
  delta <- kmeans$delta
  theta <- kmeans$theta
  p <- ncol(e_data)-5
  
  # extrapolate types for treated units
  type_assignment <- rep(0,N)
  for (i in 1:N){
    Ti <- min(T,mean(e_data[e_data[,2]==i,4])-1)
    
    if (p==0){
      
      index <- e_data[,2]==i & e_data[,3]<=Ti & e_data[,3]>0
      time_index <- e_data[index,3]
      SSE <- rep(0,K)
      
      for (k in 1:K){
        deltak <- matrix(t(sapply(time_index,spline)),length(time_index))%*%delta[k,]
        SSE[k] <- sum((e_data[index,1] - deltak)^2)
      }
      type_assignment[i] <- which.min(SSE)
      
    } else if (p>0){
      
      index <- e_data[,2]==i & e_data[,3]<=Ti & e_data[,3]>0
      time_index <- e_data[index,3]
      SSE <- rep(0,K)
      residual <- e_data[index,1] - as.matrix(e_data[index,1:p+5])%*%theta
      
      for (k in 1:K){
        deltak <- matrix(t(sapply(time_index,spline)),length(time_index))%*%delta[k,]
        SSE[k] <- sum((residual - deltak)^2)
      }
      type_assignment[i] <- which.min(SSE)
      remove(residual)
    }
  }
  
  ### reorder types 
  typeMap <- rep(0,K)
  # order types to be decreasing in first-order coefficients
  # typeMap = c(2,3,1) means type 2 becomes type 1, type 3 becomes type 2, type 1 becomes type 3 
  typeMap[1:K] <- order(kmeans$delta%*%spline(min(e_data[,3])),decreasing=TRUE)
  
  # treatment status
  treatment <- sapply(1:N,function(i) mean(e_data[e_data[,2]==i,4])<=T)
  
  ### Type classification results for all units
  kmeans <- list(delta=matrix(kmeans$delta[typeMap,],K),
                 theta=kmeans$theta,
                 gamma=sapply(type_assignment, function(x) which(typeMap==x)),
                 MSE=kmeans$MSE,
                 noi=kmeans$noi,
                 summary=cbind(1:K,
                               sapply(typeMap,function(k) sum(type_assignment==k & treatment==1)),
                               sapply(typeMap,function(k) sum(type_assignment==k & treatment==0)) )
  )
  return(kmeans)
}

############
### CATT ###
############

estFullCATT <- function(gamma,Pi,L,e_data,t,k,e){
  # gamma is N by 1 vector for type assignment
  
  # Pi is N by (1+p_pi) matrix of propensity score estimates
  # p_pi is the dimension of parameters in the propensity score model
  # the first column Pi is the propensity score ratio
  # the rest of the columns of Pi is the first derivative of the propensity score ratio
  
  # L is N by p_pi matrix of scores in the linear approximation for propensity score estimator
  
  # when there is no control covariate Xi, 
  # the every column except for the first of Pi and all of L are zeros.   
  
  # e_data is N*T by (5+p) matrix for Y and X
  # p is the dimension of X
  # the order is Y, id, time, treatment timing, weight, X
  
  # create vector of outcome differences and treatment timing
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  Ydiff <- sapply(1:N, 
                  function(i) e_data[e_data[,2]==i & e_data[,3]==t,1] - 
                    e_data[e_data[,2]==i & e_data[,3]==e-1,1] )
  E <- sapply(1:N, function(i) mean(e_data[e_data[,2]==i,4]))
  
  # estimate mu(k,e)
  mu <- sum(gamma==k & E==e)/N
  
  # estimate E[(Yt - Ye-1)1{k,e}]
  muY <- sum(Ydiff[gamma==k & E==e])/N
  
  # estimate E[W]
  muW <- sum((Pi[,1])[gamma==k & E>t])/N
  
  # estimate E[(Yt - Ye-1)W]
  muYW <- sum((Ydiff*Pi[,1])[gamma==k & E>t])/N
  
  # estimate B1bar
  B1 <- (1/muW)*apply(as.matrix((Ydiff*as.matrix(Pi[,2:ncol(Pi)]))[gamma==k & E>t,]),
                      2,sum)/N
  
  # estimate B2bar
  B2 <- (1/muW)*apply(as.matrix(Pi[gamma==k & E>t,2:ncol(Pi)]),2,sum)/N
  
  # compute estimate and the vector of scores
  estimate <- muY/mu - muYW/muW
  score <- (gamma==k & E==e)/mu*(Ydiff - muY/mu) - 
    (gamma==k & E>t)*Pi[,1]/muW*(Ydiff - muYW/muW) - L%*%t(B1 - muYW/muW*B2)
  
  return(list(estimate=estimate,
              score=score))
}

estCATT <- function(gamma,Pi,L,e_data,r,k){
  # gamma is N by 1 vector for type assignment
  
  # Pi is N by (1+p_pi) by T array of propensity score estimates
  # p_pi is the dimension of parameters in the propensity score model
  # the first column Pi is the propensity score ratio
  # the rest of the columns of Pi is the first derivative of the propensity score ratio
  
  # L is N by p_pi by T array of scores in the linear approximation for propensity score estimator
  
  # when there is no control covariate Xi, 
  # the every column except for the first column of Pi and all of L are zeros.   
  
  # e_data is N*T by (5+p) matrix for Y and X
  # p is the dimension of X
  # the order is Y, id, time, treatment timing, weight, X
  
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  E <- sapply(1:N, function(i) mean(e_data[e_data[,2]==i,4]))
  
  # find a vector of treatment timing that can estimate r-th lagged CATT for type k
  EAxis <- sort(unique(E[gamma==k]))
  EAxis <- EAxis[EAxis<=min(T-r,T)]
  
  # estimate mu(k,e) for each e and compute the vector of scores for the weighting over CATT
  muAxis <- rep(0,length(EAxis))
  scoreMat1 <- matrix(0,N,length(EAxis))
  for (i in 1:length(EAxis)){
    muAxis[i] <- sum(gamma==k & E==EAxis[i])/N
  }
  for (i in 1:length(EAxis)){
    scoreMat1[,i] <- ((gamma==k & E==EAxis[i]) - muAxis[i])/sum(muAxis) -
      muAxis[i]*((gamma==k & E<=max(EAxis)) - sum(muAxis))/(sum(muAxis))^2
  }
  
  # estimate CATTe+r(k,e) for each e and compute the vector of scores for CATT
  CATTAxis <- rep(0,length(EAxis))
  scoreMat2 <- matrix(0,N,length(EAxis))
  for (i in 1:length(EAxis)){
    est <- estFullCATT(gamma,Pi[,,EAxis[i]],L[,,EAxis[i]],e_data,EAxis[i]+r,k,EAxis[i])
    CATTAxis[i] <- est$estimate
    scoreMat2[,i] <- est$score
  }
  
  # sum CATT and combine the two scores
  estimate <- sum(CATTAxis*muAxis)/sum(muAxis)
  score <- apply(muAxis/sum(muAxis)*t(scoreMat2),2,sum) + apply(CATTAxis*t(scoreMat1),2,sum)
  
  return(list(estimate=estimate,
              score=score))
}